Set working directory

As a standard I always start out by setting my working directory to the current folder:

#Setting working directory 
setwd("/Users/matilde/Desktop/AU/Cultural Data Science/R/CDS2020_1")

Get the necessary packages

First, start with installing the relevant packages ‘tidyverse’, ‘gganimate’, and ‘gapminder’.

#Q7 Come up with a question you want to answer using the gapminder data and write it down. Then, create a data visualisation that answers the question and explain how your visualization answers the question. (Example: you wish to see what was mean life expectancy across the continents in the year you were born versus your parents’ birth years). [hint: if you wish to have more data than is in the filtered gapminder, you can load either the gapminder_unfiltered dataset and download more at https://www.gapminder.org/data/ ]

#Gapminder research question Does pollusion go up as people get more money? Does co2 emission per capita rise with income per capita?

#loading an extented version of Gapminder
gapminder2 <- gapminder_unfiltered
#loading data of co2 emission and population
co2_emissions_tonnes_per_person <- read_csv("Gapminder_data/co2_emissions_tonnes_per_person.csv")
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   country = col_character()
## )
## See spec(...) for full column specifications.
#making the dataframe into long format
country_list <- co2_emissions_tonnes_per_person$country
column_list <- colnames(co2_emissions_tonnes_per_person)
column_list <- column_list[column_list != 'country']
co2_emissions_tonnes_per_person <- co2_emissions_tonnes_per_person %>% 
  gather(key = "year", value = "co2_emission", column_list)
## Note: Using an external vector in selections is ambiguous.
## ℹ Use `all_of(column_list)` instead of `column_list` to silence this message.
## ℹ See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This message is displayed once per session.
#merging the to datasets 
gapminder_co2 <- merge(gapminder2, co2_emissions_tonnes_per_person, by = c('year', 'country'))

#ommitting rows with NA's
gapminder_co2_complete <- drop_na(gapminder_co2, co2_emission)

Answering the question with an animated object

#Generating the anim object
anim_co2 <- ggplot(gapminder_co2_complete, aes(gdpPercap, co2_emission, size = pop, color = continent)) +
  geom_point() +
  scale_size_continuous(label=comma) +
  scale_x_log10(labels = scales::comma) +  # convert x to log scale
  scale_y_log10(labels = scales::comma) +  # convert y to log scale
  labs(title = "Does polusion rise as people become richer?",
       subtitle = "The whole world",
       x = 'Gross domestic product (GDP) per capita',
       y = 'CO2 emission in tons per capita',
       size = 'Population',
       color = 'Continent',
       caption = 'Data source: www.gapminder.org/data/')+
  geom_text(aes(x = min(gdpPercap), y = min(lifeExp), label = as.factor(year)), hjust=-1, vjust = 8, alpha = 0.2,  col = "gray", size = 20) +
  transition_states(as.factor(year), state_length = 0)
anim_co2
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 2 rows containing missing values (geom_point).
#Looking at Europe only
europe <- gapminder_co2_complete %>%
  filter(continent =="Europe") %>% 
  ggplot(aes(gdpPercap, co2_emission, size = pop, color = country)) +
  geom_point() +
  geom_text(aes(label=country), hjust=0.1, vjust=-0.9, nudge_x = -.04)+ #adjustes the placement of country labels
  theme(legend.position = "none") + #removes the caption of countries from the sidebar
  scale_size_continuous(label=comma) +
  scale_x_log10(labels = scales::comma) +  # convert x to log scale
  labs(title = "Does polusion rise as people become richer?",
       subtitle = "Only Europe",
       x = 'Gross domestic product (GDP) per capita',
       y = 'CO2 emission in tons per capita',
       size = 'Population',
       caption = 'Data source: www.gapminder.org/data/') +
  geom_text(aes(x = min(gdpPercap), y = min(co2_emission), label = as.factor(year)), hjust=0.1, vjust = -9, alpha = 0.2,  col = "gray", size = 20) +
  transition_states(as.factor(year), state_length = 0)
europe

Looking at Europe it seems that some countries are worse than others. I want to look at how the worst are comparing to each other by animating a bar chart race inspired by the tutorial mentioned earlier:

##Barchart race
#filtering the 15 most polluting countries in Europe
gapminder_europe_most_co2 <- gapminder_co2 %>% 
  filter(continent =="Europe") %>% 
  group_by(year) %>%
  arrange(year, desc(co2_emission)) %>%
  mutate(ranking = row_number()) %>%
  filter(ranking <=15)
head(gapminder_europe_most_co2)
## # A tibble: 6 x 8
## # Groups:   year [1]
##    year country     continent lifeExp    pop gdpPercap co2_emission ranking
##   <int> <fct>       <fct>       <dbl>  <int>     <dbl>        <dbl>   <int>
## 1  1950 Luxembourg  Europe       65.7 2.96e5    14555.        25.1        1
## 2  1950 United Kin… Europe       69.0 5.01e7     9767.         9.9        2
## 3  1950 Belgium     Europe       66.4 8.64e6     7990.         8.83       3
## 4  1950 Germany     Europe       66.5 6.84e7     6090.         7.31       4
## 5  1950 Czech Repu… Europe       64.4 8.93e6     6691.         6.51       5
## 6  1950 Slovak Rep… Europe       60.9 3.46e6     4938.         5.39       6
#Creating the bar chart race
anim_race <- gapminder_europe_most_co2 %>%
  ggplot() +
  geom_col(aes(ranking, co2_emission, fill = country)) +
  labs(title = "Bar race of top ranked CO2 emission",
       subtitle = "Only Europe",
       x = 'CO2 emission in tons per capita',
       caption = 'Data source: www.gapminder.org/data/')+
  geom_text(aes(ranking, co2_emission, label = 'CO2 emission'), hjust=-0.1) +
  geom_text(aes(ranking, y=0 , label = country), hjust=1.1) + 
  geom_text(aes(x=15, y=max(co2_emission) , label = as.factor(year)), vjust = 0.2, alpha = 0.5,  col = "gray", size = 20) +
  coord_flip(clip = "off", expand = FALSE) + scale_x_reverse() +
  theme_minimal() + theme(
    panel.grid = element_blank(), 
    legend.position = "none",
    axis.ticks.y = element_blank(),
    axis.title.y = element_blank(),
    axis.text.y = element_blank(),
    plot.margin = margin(1, 4, 1, 3, "cm")
  ) +
  transition_states(year, state_length = 0, transition_length = 6) +
  enter_fade() +
  exit_fade() + 
  ease_aes('quadratic-in-out') 
anim_race

It looks like some data is missing for some years for some countries, which looks strange in the animations, so I will try to create an animation where only the countries with a full dataset appears

#only countries with a full data-set
complete_countries <- gapminder_co2_complete %>% 
  group_by(country) %>% 
  filter(n()==58)
#counting which countries that have complete data
complete_country_list <- unique(complete_countries$country)
#looking at which countries remain
unique(complete_countries$country)
##  [1] Czech Republic  Denmark         Finland         Iceland        
##  [5] Japan           Netherlands     Norway          Portugal       
##  [9] Slovak Republic Spain           Sweden          Switzerland    
## 187 Levels: Afghanistan Albania Algeria Angola Argentina Armenia ... Zimbabwe

It looks like only 13 countries are left…

I will try to make an animation of the countries which are left.

#make animation
anim_co2_complete <- 
  complete_countries %>% 
  ggplot(aes(gdpPercap, co2_emission, color = country)) +
  theme_minimal() +
  geom_point() +
  geom_text(aes(label=country), hjust=-0.1, vjust=-0.6, nudge_x = -.04)+ 
  scale_size_continuous(label=comma) +
  scale_x_log10(labels = scales::comma) +  # convert x to log scale
  #scale_y_log10(labels = scales::comma) +  
  theme(legend.position = "none") +
  labs(title = "Does polusion rise as people become richer?",
       subtitle = "The 13 remaining countris",
       x = 'Gross domestic product (GDP) per capita',
       y = 'CO2 emission in tons per capita',
       caption = 'Data source: www.gapminder.org/data/') +
  geom_text(aes(x = min(gdpPercap), y = min(co2_emission), label = as.factor(year)), hjust=0.1, vjust = -7, alpha = 0.2,  col = "gray", size = 20) +
  transition_states(as.factor(year), state_length = 0)
anim_co2_complete

Even though it looks nicer, it is not so respresentative and I don’t know what to say about the patterns in general.

So my conclusion to the question is, that Luxembourg is far the most pulluting country, but also quite rich, so their might be a correlation here. But since there is a lot of missing data, it is hard to say much about the general pattern, even though it quite clearly seems that CO2 emission and income is following each other quite linearly and that both has gone up quite a lot from 1950 to 2007.